package com.GetEmail;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;

/**
 * @description:todo
 * @auth yangzhiwei
 * @date 2021-07-25 23:23
 */
import java.io.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class WebPageSource2 {

    private static final String
            inputFile = "E:\\input.txt";
    private static final String OUTPATH = "E:\\url.txt";
    private  final  static  String regex = "\\w+@\\w+\\.(com\\.cn)|\\w+@\\w+\\.(com|cn) |\\w+@\\w+\\.edu\\.(com|cn)";
    private final static Pattern emailer = Pattern.compile(regex);

    public static void main(String[] args) throws IOException {

        //爬取 该网页所有 a标签
        //   Elements as = doc.getElementsByTag("a");
        File input = new File(inputFile);
        FileReader fr = new FileReader(input);
        BufferedReader br = new BufferedReader(fr);
        String url = null;
        final ArrayList<String> result = new ArrayList();
        while ((url = br.readLine()) != null) {
            Document doc = Jsoup.connect(url).get();/*发起GET请求*/

            //System.out.println(doc);
            Elements as = doc.getElementsByTag("a");

            as.forEach(

                  element -> {
                      //Map hashMap = element.dataset();

                      String qikanURl = element.select("a").attr("href");
                      if (qikanURl.contains("../")) {
                          qikanURl = qikanURl.replace("../", "http://www.yxxb.com.cn:8081/aps/CN/");
                      }

                      if (qikanURl.contains("http") &&
                              qikanURl.contains("abstract")&&
                              !qikanURl.contains("zip") && qikanURl.contains("ml")
                              && !qikanURl.contains("current") && !qikanURl.contains("col")
                                && !qikanURl.contains("auth")
                      ) {
                          System.out.println(qikanURl);
                          Document qikan = null;
                          try {
                              qikan = Jsoup.connect(qikanURl).get();/*发起GET请求*/
                          } catch (IOException e) {
                              e.printStackTrace();
                          }
                          String outPutHtml = qikan.outerHtml();
                          Matcher matchr = emailer.matcher(outPutHtml);
                          while (matchr.find()) {
                              System.out.println(matchr.group());


                              String email = matchr.group();
                              result.add(email);


                          }
                      }




                  }
            );
        }


        System.out.println(result);
        FileWriter fileWriter = null;
        try {
            fileWriter = new FileWriter("E://email.txt");
        } catch (IOException e) {
            e.printStackTrace();
        }
        BufferedWriter bw = new BufferedWriter(fileWriter);

        result.stream().forEach(email->{
            try {
                bw.write(email);
            } catch (IOException e) {
                e.printStackTrace();
            }
            try {
                bw.newLine();
            } catch (IOException e) {
                e.printStackTrace();
            }
        });






    }


}
